# Replication data for:
# "Artificial intelligence in breast cancer screening: Primary care provider preferences"
# By Nathaniel Hendrix, A. Brett Hauber, Christoph I. Lee, Aasthaa Bansal, and David L. Veenstra

########################
#### Data wrangling ####
########################

library(here)
library(dplyr)

# load experimental design
design <- read.csv(here("ai_pcp_design.csv"))

# change experimental design to prepare for merging with responses
design_1 <- design[,1:7]
design_2 <- design[,c(1,8:13)]
names(design_1) <- c("task", "sens", "spec", "rad", "evid", "tran", "repr")
names(design_2) <- c("task", "sens", "spec", "rad", "evid", "tran", "repr")
design <- rbind(design_1, design_2)
design <- design[order(design$task),]

# add dummy and continuous coding to design
design$sens_89 <- ifelse(design$sens == 2, 1, 0)
design$sens_94 <- ifelse(design$sens == 3, 1, 0)
design$spec_89 <- ifelse(design$spec == 2, 1, 0)
design$spec_94 <- ifelse(design$spec == 3, 1, 0)
design$rad_part <- ifelse(design$rad == 2, 1, 0)
design$rad_all <- ifelse(design$rad == 3, 1, 0)
design$evid_rct <- ifelse(design$evid == 2, 1, 0)
design$evid_both <- ifelse(design$evid == 3, 1, 0)
design$tran_part <- ifelse(design$tran == 2, 1, 0)
design$tran_full <- ifelse(design$tran == 3, 1, 0)
design$repr_med <- ifelse(design$repr == 2, 1, 0)
design$repr_high <- ifelse(design$repr == 3, 1, 0)

design <- design[,c(1,8:19)]

# clean up unused data frames
rm(design_1)
rm(design_2)

# load DCE responses
df <- read.csv(here("ai_pcp_raw.csv"), fileEncoding="UTF-8-BOM")

# create framework of analytic dataset by repeating experimental design
analysis <- do.call("rbind", replicate(nrow(df), design, simplify = FALSE))
analysis$id <- rep(df$participant_id, each = 30)
analysis$option <- rep(c(1:2), 15*nrow(df))

# add choices and opt-out
choice <- numeric()
for(i in 1:nrow(analysis)) {
  temp <- df[df$participant_id == analysis[i,"id"] &
               df[,2*as.numeric(analysis[i,"task"])] == analysis[i,"option"],]
  choice[i] <- ifelse(nrow(temp) == 1, 1, 0)
}
analysis$choice <- choice
optout <- numeric()
for(i in 1:nrow(analysis)) {
  temp <- df[df$participant_id == analysis[i,"id"] &
               df[,1+2*as.numeric(analysis[i,"task"])] == 2,]
  optout[i] <- ifelse(nrow(temp) == 1, 1, 0)
}
analysis$optout <- optout

# merge in respondent characteristics
respondents <- df[,c(1,32:38)]
names(respondents)[1] <- "id"
respondents$female <- ifelse(respondents$gender == 1,1,0)
analysis <- merge(analysis, respondents, by = "id", all.x = TRUE)
analysis <- analysis[order(analysis$id, analysis$task, analysis$option),
                     c(1,2,15,3:14,16:ncol(analysis))]

# the above yields a dummy coded dataset

# convert to effects coding
analysis_effects <- analysis
analysis_effects[analysis_effects$sens_89 == 0 & analysis_effects$sens_94 == 0, c("sens_89", "sens_94")] <- -1
analysis_effects[analysis_effects$spec_89 == 0 & analysis_effects$spec_94 == 0, c("spec_89", "spec_94")] <- -1
analysis_effects[analysis_effects$rad_part == 0 & analysis_effects$rad_all == 0, c("rad_part", "rad_all")] <- -1
analysis_effects[analysis_effects$evid_rct == 0 & analysis_effects$evid_both == 0, c("evid_rct", "evid_both")] <- -1
analysis_effects[analysis_effects$tran_part == 0 & analysis_effects$tran_full == 0, c("tran_part", "tran_full")] <- -1
analysis_effects[analysis_effects$repr_med == 0 & analysis_effects$repr_high == 0, c("repr_med", "repr_high")] <- -1

# create version of analysis file with ACS for opt out
analysis_temp <- analysis[analysis$option == 1,]
analysis_temp[,3] <- 3
analysis_temp[,4] <- -1
analysis_temp[,5] <- -1
analysis_temp[,6] <- 1
analysis_temp[,7] <- 0
analysis_temp[,8] <- 0
analysis_temp[,9] <- 0
analysis_temp[,10] <- 0
analysis_temp[,11] <- 0
analysis_temp[,12] <- 0
analysis_temp[,13] <- 0
analysis_temp[,14] <- 0
analysis_temp[,15] <- 0
analysis_temp[,16] <- ifelse(analysis_temp$optout == 1, 1, 0)
analysis_effects[analysis_effects$optout == 1,"choice"] <- 0
analysis_effects[,"optout"] <- 1
analysis_acs <- rbind(analysis_temp, analysis_effects)
analysis_acs <- analysis_acs[order(analysis_acs$id, analysis_acs$task, analysis_acs$option),]
analysis_acs$indid <- rep(1:(nrow(analysis_acs)/3), each = 3)
analysis_acs[analysis_acs$option < 3,"optout"] <- -1
analysis_acs[analysis_acs$option == 3,"optout"] <- 1

# bin respondent characteristics for Stata
analysis_acs$midwest <- ifelse(analysis_acs$practice_region == "Midwest",1,0)
analysis_acs$northeast <- ifelse(analysis_acs$practice_region == "Northeast",1,0)
analysis_acs$south <- ifelse(analysis_acs$practice_region == "South",1,0)
analysis_acs$practice_region <- NULL
analysis_acs$gender <- NULL
analysis_acs$urban <- ifelse(analysis_acs$practice_ruca >= 4,1,0)
analysis_acs$practice_ruca <- NULL
analysis_acs$tech_negative <- ifelse(analysis_acs$tech_attitude < 3, 1, 0)
analysis_acs$tech_attitude <- NULL
analysis_acs$rad_contact_med_high <- ifelse(analysis_acs$rad_contact > 2, 1, 0)
analysis_acs$rad_contact <- NULL
analysis_acs$rad_trust_mod_high <- ifelse(analysis_acs$rad_trust < 4, 1, 0)
analysis_acs$rad_trust <- NULL

# write file
write.csv(analysis_acs, here("ai_pcp_processed.csv"), row.names = FALSE)

######################
## Convert to using different reference levels
######################

analysis_acs$sens_85 <- ifelse(analysis_acs$sens_89 == -1 & 
                                 analysis_acs$sens_94 == -1, 1, 0)
analysis_acs$spec_85 <- ifelse(analysis_acs$spec_89 == -1 & 
                                 analysis_acs$spec_94 == -1, 1, 0)
analysis_acs$rad_none <- ifelse(analysis_acs$rad_part == -1 & 
                                  analysis_acs$rad_all == -1, 1, 0)
analysis_acs$evid_obs <- ifelse(analysis_acs$evid_rct == -1 &
                                  analysis_acs$evid_both == -1, 1, 0)
analysis_acs$tran_none <- ifelse(analysis_acs$tran_part == -1 &
                                   analysis_acs$tran_full == -1, 1, 0)
analysis_acs$repr_low <- ifelse(analysis_acs$repr_med == -1 &
                                  analysis_acs$repr_high == -1, 1, 0)

analysis_acs[analysis_acs$sens_85 == 1,
                     "sens_89"] <- 0
analysis_acs[analysis_acs$spec_85 == 1,
                     "spec_89"] <- 0
analysis_acs[analysis_acs$rad_none == 1,
                     "rad_part"] <- 0
analysis_acs[analysis_acs$evid_obs == 1,
                     "evid_rct"] <- 0
analysis_acs[analysis_acs$tran_none == 1,
                     "tran_part"] <- 0
analysis_acs[analysis_acs$repr_low == 1,
                     "repr_med"] <- 0

analysis_acs[analysis_acs$sens_94 == 1,
                     c("sens_85", "sens_89")] <- -1
analysis_acs$sens_94 <- NULL
analysis_acs[analysis_acs$spec_94 == 1,
                     c("spec_85", "spec_89")] <- -1
analysis_acs$spec_94 <- NULL
analysis_acs[analysis_acs$rad_all == 1,
                     c("rad_none", "rad_part")] <- -1
analysis_acs$rad_all <- NULL
analysis_acs[analysis_acs$evid_both == 1,
                     c("evid_obs", "evid_rct")] <- -1
analysis_acs$evid_both <- NULL
analysis_acs[analysis_acs$tran_full == 1,
                     c("tran_none", "tran_part")] <- -1
analysis_acs$tran_full <- NULL
analysis_acs[analysis_acs$repr_high == 1,
                     c("repr_low", "repr_med")] <- -1
analysis_acs$repr_high <- NULL
write.csv(analysis_acs, here("ai_pcp_processed2.csv"), row.names = FALSE)


